#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr 1.1.2 ✔ readr 2.1.4
#> ✔ forcats 1.0.0 ✔ stringr 1.5.0
#> ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
#> ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
#> ✔ purrr 1.0.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Tidyverse
1 Tidyverse Challenge
Analyze the sales by location (state) with a bar plot. Since state and city are multiple features (variables), they should be split. Which state has the highes revenue? Replace your bike_orderlines_wrangled_tbl object with the newly wrangled object (with the columns state and city).
2 Running Code
2.0 Load libraries
2.1 Importing Files
bikes_tbl <- read_excel(path ="C:/Users/arpit/OneDrive/Documents/GitHub/repo/ss23-bdsb-arpitaprayag/assets/data/ds_data/ds_data/01_bike_sales/01_raw_data/bikes.xlsx")
orderlines_tbl <- read_excel(path = "C:/Users/arpit/OneDrive/Documents/GitHub/repo/ss23-bdsb-arpitaprayag/assets/data/ds_data/ds_data/01_bike_sales/01_raw_data/orderlines.xlsx")#> New names:
#> • `` -> `...1`
2.2 Examining Data
2.3 Joining Data
2.4 Wrangling Data
bike_states_wrangled_tbl <- bike_orderlines_joined_tbl %>%
# 5.1 Separate state and city name
separate(location, into = c("city", "state"), sep = ", ") %>%
# 5.2 Add the total price (price * quantity)
# Add a column to a tibble that uses a formula-style calculation of other columns
mutate(total.price = price * quantity)
sales_by_states_tbl <- bike_states_wrangled_tbl %>%
# Select columns and add a year
select(order.date, total.price, state) %>%
mutate(year = year(order.date)) %>%
# Group by and summarize year and states
group_by(year, state) %>%
summarise(sales = sum(total.price)) %>%
ungroup() %>%
# Format $ Text
mutate(sales_text = scales::dollar(sales, big.mark = ".",
decimal.mark = ",",
prefix = "",
suffix = " €"))#> `summarise()` has grouped output by 'year'. You can override using the
#> `.groups` argument.
3 Visualize
sales_by_states_tbl %>%
# Set up x, y, fill
ggplot(aes(x = year, y = sales, fill = state)) +
# to your plotting code to rotate your x-axis labels
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
# Geometries
geom_col(fill = "#2DC6D6") + # Use geom_col for a bar plot
geom_label(aes(label = sales)) + # Adding labels to the bars
geom_smooth(method = "lm", se = FALSE) + # Adding a trendline
# Facet
facet_wrap(~ state) +
# Formatting
scale_y_continuous(labels = scales::dollar_format(big.mark = ".",
decimal.mark = ",",
prefix = "",
suffix = " €")) +
labs(
title = "Revenue by state",
subtitle = "North Rhine-Westphalia has highest revenue",
fill = "States" # Changes the legend name
)#> `geom_smooth()` using formula = 'y ~ x'
